import os
from dotenv import load_dotenv, find_dotenv
from langchain import HuggingFaceHub
from langchain import PromptTemplate, LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import YoutubeLoader
import textwrap


# --------------------------------------------------------------
# Loading the HuggingFaceHub API token from the .env file
# --------------------------------------------------------------


load_dotenv(find_dotenv())
HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]


# --------------------------------------------------------------
# Loading the LLM model from the HuggingFaceHub
# --------------------------------------------------------------

# We can checkout https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for other options
# But here we are loading the 'Falcon-7B' model


repo_id = "tiiuae/falcon-7b-instruct" 
falcon_llm = HuggingFaceHub(
    repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 500}
)


# --------------------------------------------------------------
# Creating a PromptTemplate and LLMChain
# --------------------------------------------------------------


template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=falcon_llm)


# --------------------------------------------------------------
# Running the LLMChain
# --------------------------------------------------------------


question = "How do I make a sandwich?"

response = llm_chain.run(question)
wrapped_text = textwrap.fill(
    response, width=100, break_long_words=False, replace_whitespace=False
)

print("\n----- ----- ----- ----- ----- ----- ----- ----- ----- ----- \n")
print('Prompt --> ' + question + '\n')
print('Falcon-7B -->')
print(wrapped_text)
print("\n----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ")


# --------------------------------------------------------------
# Loading a video transcript from YouTube
# --------------------------------------------------------------


# video_url = "https://www.youtube.com/watch?v=ukj_ITJKBwE&t=287s"
video_url = "https://www.youtube.com/watch?v=egDIqKLt2L4&t=4081s"
loader = YoutubeLoader.from_youtube_url(video_url)
transcript = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000)
docs = text_splitter.split_documents(transcript)


# --------------------------------------------------------------
# Summarization with LangChain
# --------------------------------------------------------------


chain = load_summarize_chain(falcon_llm, chain_type="map_reduce", verbose=True)
# (Optional)   print(chain.llm_chain.prompt.template)
# (Optional)   print(chain.combine_document_chain.llm_chain.prompt.template)


# --------------------------------------------------------------
# Testing the Falcon model with text summarization
# --------------------------------------------------------------


output_summary = chain.run(docs)
wrapped_text = textwrap.fill(
    output_summary, width=100, break_long_words=False, replace_whitespace=False
)

print("\n----- ----- ----- ----- ----- ----- ----- ----- ----- ----- \n")
print('Youtube Video Link --> ' + video_url + '\n')
print('Falcon-7B Summary --> \n')
print(wrapped_text)
print("\n----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ")
